library(rJava)
library(xlsx)
library(ggplot2)
library(psych)
library(grid)
library(gridExtra)

#df<-read.xlsx("dataset.xlsx", 1)
df<-read.table("dataset.csv", sep =",", header = TRUE)
df1<-df[-which(df$exclude =="yes"),]
df2<-df1[which(df1$source =="mae-sot"),]

df2$zygosity[75] <- "homozygous"
df2$type<-array("hemi/homozygous",nrow(df2))
idx<-which(df2$zygosity=="heterozygous")
df2$type[idx] <- "heterozygous"
df2$type[which(df2$mutation=="WT")] <-"WT"


df2$Hb_typing<-as.character(levels(df2$Hb_typing))[df2$Hb_typing]

nor<-which(df2$Hb_typing == "Normal")
df2$Hb_typing[nor] <-"Hb Normal"
abn<-which(df2$Hb_typing != "Hb Normal")
df2$Hb_typing[abn]<-"Hb Mutated"

df2$New_Hb_abnormal<-as.character(levels(df2$New_Hb_abnormal))[df2$New_Hb_abnormal]

nor<-which(df2$New_Hb_abnormal == "Normal")
df2$New_Hb_abnormal[nor] <-"Hb Normal"
abn<-which(df2$New_Hb_abnormal != "Hb Normal")
df2$New_Hb_abnormal[abn]<-"Hb Abnormal"



P1<-ggplot(df2, aes(type, MCV))+ geom_boxplot(aes(fill = factor(Hb_typing)))+
  theme(legend.position="top") +theme(legend.title=element_blank())+
  xlab("")

P2<-ggplot(df2, aes(type, Retics))+ geom_boxplot(aes(fill = factor(Hb_typing)))+
  theme(legend.position="top") +theme(legend.title=element_blank()) +
  xlab("")+ylab("reticulocytes")

P3<-ggplot(df2, aes(type, Retics))+ geom_boxplot(aes(fill = factor(New_Hb_abnormal)))+
  theme(legend.position="top") +theme(legend.title=element_blank()) +
  xlab("")+ylab("reticulocytes")

grid.arrange(P1, P2,P3, ncol = 1)

#Acrross Zygosities
nor<-which(df2$New_Hb_abnormal == "Hb Abnormal")
B1 <- aov(df2$Retics[nor] ~ df2$type[nor])
B2 <- aov(df2$MCV[nor] ~ df2$type[nor])
C1<-TukeyHSD(B1)
C2<-TukeyHSD(B2)


# within zygosities
df2$fig1A<-paste(df2$type,df2$Hb_typing)
df2$fig1C<-paste(df2$type,df2$New_Hb_abnormal)


B3 <- aov(df2$MCV ~ df2$fig1A)
B4 <- aov(df2$Retics ~ df2$fig1A)
C3<-TukeyHSD(B3)
C4<-TukeyHSD(B4)

B5 <- aov(df2$Retics ~ df2$fig1C)
C5<-TukeyHSD(B5)




